/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.distributions;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import mosdi.fa.GeneralizedString;
import mosdi.util.BitArray;
import mosdi.util.Log;

/** Represents an inhomogeneous distribution over an alphabet. */
public class InhomogeneousDistribution {
	/** The contained character distributions in no particular order. */
	private List<double[]> distributions;
	// TODO: use runlength encoding to gain space efficiency
	/** For each position, the index of the corresponding distribution
	 *  is listed, i.e. distributions.get(positions[i]) gives the distribution
	 *  at position i. */
	private int[] positions;
	private int alphabetSize;
	/** Mapping between hashcodes (key) of distributions and their position (value)
	 *  in the list of distributions. */
	private HashMap<Integer,Integer> distributionMap;
	
	public InhomogeneousDistribution(int length, double[] baseDistribution) {
		alphabetSize = baseDistribution.length;
		distributions = new ArrayList<double[]>();
		distributions.add(baseDistribution);
		positions = new int[length];
		distributionMap = new HashMap<Integer,Integer>();
		distributionMap.put(Arrays.hashCode(baseDistribution), 0);
	}
	
	/** Create inhomogeneous distribution specified by a list of character distributions 
	 *  and a mapping that contains for every position the index of the character 
	 *  distribution in the list. */
	public InhomogeneousDistribution(List<double[]> distributions, int[] positions) {
		if (distributions.isEmpty()) throw new IllegalArgumentException("List of distributions is empty!");
		alphabetSize = distributions.get(0).length;
		this.distributions = distributions;
		this.positions = positions;
		distributionMap = new HashMap<Integer,Integer>();
		int n = 0;
		for (double[] dist : distributions) {
			if (dist.length!=alphabetSize) throw new IllegalArgumentException("Invalid distribution length!");
			distributionMap.put(Arrays.hashCode(dist), n);
			++n;
		}
	}
	
	public double[] getDistribution(int position) { return distributions.get(positions[position]); }
	
	public double[] getDistributionByIndex(int index) { return distributions.get(index); }
	
	/** Returns an array that contains for position the index of the character distribution 
	 *  at that position. */
	public int[] getPositionArray() { return positions;	}
	
	/** Returns a list of all character distributions occuring in this inhomogeneous distribution. 
	 *  The list is ordered by the index, i.e. getDistributionList().get(i) = getDistributionByIndex(i).
	 */
	public List<double[]> getDistributionList() { return distributions; }
	
	public void setDistribution(int position, double[] dist) {
		if (dist.length!=alphabetSize) {
			throw new IllegalArgumentException("Invalid number of characters in distribution.");
		}
		// distribution already known?
		int hashCode = Arrays.hashCode(dist);
		Integer i = distributionMap.get(hashCode);
		if (i!=null) {
			positions[position]=i;
		} else {
			i = distributions.size();
			distributions.add(dist);
			distributionMap.put(hashCode,i);
			positions[position]=i;
		}
	}

	/** Restricts the distribution at the given position to those characters
	 *  given by the parameter "restriction". */
	public void restrictDistribution(int position, BitArray restriction) {
		if (restriction.size()!=alphabetSize) {
			throw new IllegalArgumentException("Invalid number of characters in restriction.");
		}
		double[] oldDistribution = distributions.get(positions[position]); 
		double[] newDistribution = new double[alphabetSize];
		// calculate normalization factor
		double n = 0.0;
		for (int c=0; c<alphabetSize; ++c) {
			if (restriction.get(c)) n+=oldDistribution[c];
		}
		// fill in non-zero entries of new distribution
		for (int c=0; c<alphabetSize; ++c) {
			if (restriction.get(c)) newDistribution[c]=oldDistribution[c]/n;
		}
		setDistribution(position, newDistribution);
	}
	
	public class QGram {
		private int[] qGram;
		private int count;
		
		private QGram(int[] qGram) {
			this.count=1;
			this.qGram=qGram;
		}
	}

	public QGram[] getQGrams(int q) {
		Map<Integer,QGram> map = new HashMap<Integer,QGram>();
		for (int i=0; i<positions.length-q+1; ++i) {
			int[] s = new int[q];
			for (int j=0; j<q; ++j) {
				s[j]=positions[i+j];
			}
			int hash = Arrays.hashCode(s);
			QGram qGram = map.get(hash);
			if (qGram!=null) {
				qGram.count+=1;
			} else {
				qGram = new QGram(s);
				map.put(hash, qGram);
			}
		}
		QGram[] result = new QGram[map.size()];
		int n = 0;
		for (QGram qGram : map.values()) result[n++]=qGram;
		return result;
	}
	
//	public double calcExpectation(GeneralizedString gs) {
//		if (alphabetSize!=gs.getAlphabet().size()) throw new IllegalArgumentException();
//		double expectation = 0.0;
//		for (int i=0; i<=positions.length-gs.length(); ++i) {
//			double pLocal = 1.0;
//			for (int j=0; j<gs.length(); ++j) {
//				double[] dist = distributions.get(positions[i+j]);
//				double pChar = 0.0;
//				BitArray charSet = gs.getPosition(j);
//				for (int c=0; c<alphabetSize; ++c) {
//					if (!charSet.get(c)) continue;
//					pChar+=dist[c];
//				}
//				pLocal*=pChar;
//			}
//			expectation+=pLocal;
//		}
//		return expectation;
//	}
	
	public double calcExpectation(GeneralizedString gs, QGram[] qGrams) {
		double expectation = 0.0;
		for (QGram qGram : qGrams) {
			if (qGram.qGram.length!=gs.length()) throw new IllegalArgumentException();
			double pLocal = 1.0;
			for (int i=0; i<gs.length(); ++i) {
				double[] dist = distributions.get(qGram.qGram[i]);
				double pChar = 0.0;
				BitArray charSet = gs.getPosition(i);
				for (int c=0; c<alphabetSize; ++c) {
					if (!charSet.get(c)) continue;
					pChar+=dist[c];
				}
				pLocal*=pChar;
			}
			expectation+=pLocal*qGram.count;
		}
		return expectation;
	}
	
	public String toString() {
		StringBuilder sb = new StringBuilder();
		// print the length
		sb.append(String.format("LEN %d\n", positions.length));
		// print distributions and mark lines with a leading D
		for (double[] dist : distributions) {
			sb.append("DIST ");
			for (int i=0; i<alphabetSize; ++i) {
				sb.append(String.format("%e ", dist[i]));
			}
			sb.append("\n");
		}
		// print sequence of distributions in a runlength encoded way: \
		// <dist_idx>[,<multiplicity>];<dist_idx>[,<multiplicity>];...
		sb.append("POS ");
		int last = positions[0];
		int multiplicity = 0;
		for (int i=0; i<positions.length; ++i) {
			if (positions[i]==last) {
				multiplicity+=1;
			} else {
				if (multiplicity==1) {
					sb.append(String.format("%d;", last));
				} else {
					sb.append(String.format("%d,%d;", last, multiplicity));
				}
				last = positions[i];
				multiplicity = 1;
			}
		}
		if (multiplicity==1) {
			sb.append(String.format("%d", last));
		} else {
			sb.append(String.format("%d,%d", last, multiplicity));
		}
		return sb.toString();
	}

	// TODO: Write stuff returned by toString() to file.
//	public void writeToFile(String filename) {
//		
//	}
	
	public static InhomogeneousDistribution readFromFile(String filename) {
		FileInputStream file = null;
		try {
			file = new FileInputStream(filename);
		} catch (FileNotFoundException e) {
			Log.errorln("File with inhomogeneous distribution not found, sorry!");
			System.exit(1);
		}
		BufferedReader br = new BufferedReader(new InputStreamReader(file));
		
		List<double[]> distributions = new ArrayList<double[]>();
		int[] positions = null;
		int alphabetSize = -1;
		try {
			while (true) {
				String line = br.readLine();
				if (line==null) break;
				// strip comment
				int n = line.indexOf('#');
				if (n>=0) line = line.substring(0, n);
				if (line.startsWith("LEN ")) {
					String remainder = line.substring(4);
					positions = new int[Integer.parseInt(remainder)];
				}
				if (line.startsWith("DIST ")) {
					// if alphabet size not known ...
					if (alphabetSize==-1) {
						// ... determine number of entries, i.e. the alphabet size
						StringTokenizer st = new StringTokenizer(line.substring(5)," ",false);
						alphabetSize = 0;
						while (st.hasMoreTokens()) { ++alphabetSize; st.nextToken(); }
					}
					double[] dist = new double[alphabetSize];
					StringTokenizer st = new StringTokenizer(line.substring(5)," ",false);
					for (int i=0; i<alphabetSize; ++i) {
						if (!st.hasMoreTokens()) {
							Log.errorln("Invalid inhomogeneous distribution!");
							System.exit(1);
						}
						dist[i] = Double.parseDouble(st.nextToken());
					}
					distributions.add(dist);
				}
				if (line.startsWith("POS ")) {
					if (positions==null) {
						Log.errorln("Invalid inhomogeneous distribution!");
						System.exit(1);
					}
					StringTokenizer st = new StringTokenizer(line.substring(4), ";", false);
					int i = 0;
					while (st.hasMoreTokens()) {
						StringTokenizer st2 = new StringTokenizer(st.nextToken(), ",", false);
						int distIndex = Integer.parseInt(st2.nextToken());
						int multiplicity = 1;
						if (st2.hasMoreTokens()) multiplicity = Integer.parseInt(st2.nextToken());
						if (distIndex==0) {
							i+=multiplicity;
						} else {
							for (int j=0; j<multiplicity; ++j) positions[i++]=distIndex;
						}
					}
					if (i!=positions.length) {
						Log.errorln("Invalid inhomogeneous distribution!");
						System.exit(1);
					}
				}
			}
		} catch (IOException e) {
			Log.errorln("I/O failure while reading inhomogeneous distribution, sorry!");
			System.exit(1);
		}
		return new InhomogeneousDistribution(distributions, positions);
	}
	
	public void writeToFile(File file) {
	   PrintWriter out = null;
      try {
         out = new PrintWriter(new BufferedWriter(new FileWriter(file)));
      } catch (IOException e) {
         Log.errorln("Sorry, could not write character distribution!");
         Log.printException(e);
         System.exit(1);
      }
      
      out.printf("LEN %d%n", positions.length);
      for (int i = 0; i < distributions.size(); ++i) {
         double[] dist = distributions.get(i);
         StringBuilder sb = new StringBuilder("DIST");
         for (int j = 0; j < dist.length; ++j) {
            sb.append(" ").append(dist[j]+"");
         }
         out.println(sb.toString());
      }
      out.flush();
      StringBuilder sb = new StringBuilder("POS ");
      int multiplicity;
      for (int idx = 0; idx < positions.length; ) { // does not increment distIndex
         for (multiplicity = idx + 1; multiplicity < positions.length && positions[multiplicity] == positions[idx]; ++multiplicity) {
            // only go to the next position with a different distribution
         }
         // multiplicity is the first position with a different distribution
         // now calc the number of equal distributions (the multiplicity)
         multiplicity = multiplicity - idx;
         
         sb.append(positions[idx]+"");
         if (multiplicity != 1) {
            sb.append(",").append(multiplicity+"");
         }
         sb.append(";");
         
         // go to next distribution
         idx += multiplicity;
      }
      out.println(sb.toString());
      out.close();
	}
}
